Data import

QA band filtering

Read QA band

filtering function

# high confidence clouds or high confidence cloud shadows or fill values
mask_medconf <- function(x){
  bs <- intToBits(x)
  if ( ((bs[1]) | # cloud
        (bs[3]) | # shadow
        (bs[4]) | # snow
        (bs[5]) | # water
        (bs[6]) | # aerosol (only low quality)
        (bs[8]) | # subzero
        (bs[9]) | # saturation
        (bs[10])) # High sun zenith flag
       == 1){
    return("flag") } else {
      return("valid")
    }
}



# due to the nature of the intToBits function it is not possible to perform 
# this step in a mutate pipe. Therefor a vector is created via lapply, 
# joined to then joined to the fs_LND df and finally flaged pixels can be removed
mask_idex <- lapply(raw_fs$QAI, mask_medconf) %>% unlist()

fs_LND_filtered <- raw_fs %>%
  mutate(QAI = mask_idex) %>% 
  filter(QAI == "valid")

Katjas method compairson

# Katja´s method to filter QAI values
raw_fs_KK <- raw_fs

raw_fs_KK$qa_cloud <- bitwAnd(bitwShiftR(raw_fs_KK$QAI, 1), 3)
raw_fs_KK$qa_shadow <- bitwAnd(bitwShiftR(raw_fs_KK$QAI, 3), 1)
raw_fs_KK$qa_snow <- bitwAnd(bitwShiftR(raw_fs_KK$QAI, 4), 1)
raw_fs_KK$qa_water <- bitwAnd(bitwShiftR(raw_fs_KK$QAI, 5), 1)
raw_fs_KK$qa_aerosol <- bitwAnd(bitwShiftR(raw_fs_KK$QAI, 6), 3)
raw_fs_KK$qa_subzero <- bitwAnd(bitwShiftR(raw_fs_KK$QAI, 8), 1)
raw_fs_KK$qa_saturation <- bitwAnd(bitwShiftR(raw_fs_KK$QAI, 9), 1)
raw_fs_KK$qa_zenith <- bitwAnd(bitwShiftR(raw_fs_KK$QAI, 10), 1)
raw_fs_KK$qa_illumination <- bitwAnd(bitwShiftR(raw_fs_KK$QAI, 11), 3)
raw_fs_KK$qa_slope <- bitwAnd(bitwShiftR(raw_fs_KK$QAI, 13), 1)
raw_fs_KK$qa_vapor <- bitwAnd(bitwShiftR(raw_fs_KK$QAI, 14), 1)


raw_fs_KK$quality <- 0
raw_fs_KK$quality[raw_fs_KK$qa_cloud != 0 |raw_fs_KK$qa_snow != 0 |raw_fs_KK$qa_shadow != 0] <- 1
raw_fs_KK <- raw_fs_KK[raw_fs_KK$quality == 0, ]
print(paste("number of observations following SHS method for QAI filtering:", round(nrow(fs_LND_filtered)/6)) )
## [1] "number of observations following SHS method for QAI filtering: 79063"
print(paste("number of observations following KK method QAI filtering:", round(nrow(raw_fs_KK)/6)) )
## [1] "number of observations following KK method QAI filtering: 74693"
print(paste("The difference in included observation:", round(nrow(fs_LND_filtered)/nrow(raw_fs_KK), digits = 2), "% (n=",  (nrow(fs_LND_filtered)-nrow(raw_fs_KK)),  ")") )
## [1] "The difference in included observation: 1.06 % (n= 26220 )"

As seen above the differences in the filtering methods and parameterization is very marginal, at ~1% difference in amount of included observations

##Additional data filtering and auxilirary varaible and indicity computation

fs_LND <- fs_LND_filtered %>% 
  # reduce file size for testing
  #sample_n(10000) %>% 
  # filter out extreme values
  filter(across(BLUE:SWIR2, ~ . > 0),
         across(BLUE:SWIR2, ~ . < 10000)) %>% 
  mutate(date   = as.Date(str_sub(scene, 1, 9),format = "%Y%m%d"),
         month  = lubridate::month(date),
         year   = lubridate::year(date), 
         month_year = paste0(month,"_",year),
         doy    = lubridate::yday(date),
         sensor = str_sub(scene, -5, -1),
         SWIR_ratio = SWIR2/SWIR1,
         NDVI   = ((NIR-RED)/(NIR+RED)),
         NDTI   = ((SWIR1-SWIR2)/(SWIR1+SWIR2))) # source: https://www.mdpi.com/2072-4292/10/10/1657/htm
## Warning: Using `across()` in `filter()` is deprecated, use `if_any()` or
## `if_all()`.

## Warning: Using `across()` in `filter()` is deprecated, use `if_any()` or
## `if_all()`.

spectra vis

NDVI time series

ggplot(fs_LND, aes(doy, NDVI, color=year, group=year)) +
  geom_smooth() +
  scale_colour_viridis_c(option = "D") +
  theme_minimal()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

Here it can be seen that over time, average NDVI maximum values tend to increase over time, particularly early in the year before the growing season. Over such a long time frame it is possible though that this chance could be attributed to more irrigation, climate change, or LULC rather been strictly being resultant of deviations in sensor spec?

NDTI time series

ggplot(na.omit(fs_LND), aes(doy, NDTI, color=year, group=year)) +
  geom_smooth() +
  scale_colour_viridis_c(option = "D") +
  theme_minimal()
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

Similar story here, with NDTI decreasing over time

data wrangel into long

fs_LND_long <- fs_LND %>% 
  pivot_longer(cols=c("BLUE":"SWIR2"),names_to = "wavelength", values_to = "reflectance") %>% 
  as.data.frame()  %>% 
    mutate(wavelength_num = case_when(wavelength == "BLUE" ~ 482,
                                      wavelength == "GREEN" ~ 562,
                                      wavelength == "RED" ~ 655,
                                      wavelength == "NIR" ~ 865,
                                      wavelength == "SWIR1" ~ 1610,
                                      wavelength == "SWIR2" ~ 2200),
            reflectance = reflectance/100) # scale to percent

simple mean reflectance over time

ggplot(fs_LND_long, aes(wavelength_num, reflectance, color=year, group=year)) +
   # geom_smooth(formula = y ~ s(x, bs = "cs", k=6)) +
   stat_summary(fun=mean, geom="line", size = 1) + # draw a mean line in the data
  scale_colour_viridis_c(option = "D") +
  theme_minimal()

When plotting a simple mean of spectra across years it can be seen that more recent years dont defaco have higher reflectance values across the entire electromagnetic spectrum

boxplot

ggplot(fs_LND_long, aes(wavelength, reflectance, color=sensor)) +
 # geom_jitter() +
  geom_boxplot() +
  scale_colour_viridis_d(option = "D") +
  theme_minimal()

Looks like lots of extreme values (even post QAI filter), but when looking at the next density plot section, you can that relatively speaking, these extreme outlying values are exceedinly rare and marginal.

density

# facet wrap by wavelength
ggplot(fs_LND_long, aes(reflectance, color=sensor, fill=sensor)) +
  geom_density(alpha = 0.05) +
  #geom_jitter() +
  scale_colour_viridis_d(option = "D") +
  scale_fill_viridis_d(option = "D") +
  scale_x_continuous(expand = c(0, 0)) +
  #scale_y_continuous(expand = c(0, 0), limits = c(0, 0.02)) +
  theme_minimal() +
  guides(col = guide_legend(nrow = 3))+
  theme(legend.position = "bottom")  +
  facet_wrap(~wavelength)

# facet wrap by sensor
ggplot(fs_LND_long, aes(reflectance, color=wavelength, color=wavelength)) +
  geom_density(alpha = 0.05) +
  #geom_jitter() +
  scale_colour_viridis_d(option = "D") +
  scale_fill_viridis_d(option = "D") +
  scale_x_continuous(expand = c(0, 0)) +
  #scale_y_continuous(expand = c(0, 0), limits = c(0, 0.02)) +
  theme_minimal() +
  guides(col = guide_legend(nrow = 3))+
  theme(legend.position = "bottom")  +
  facet_wrap(~sensor)
## Warning: Duplicated aesthetics after name standardisation: colour

ridges

ggplot(fs_LND_long, aes(x = reflectance, y = sensor, fill = stat(x))) +
  geom_density_ridges_gradient(scale = 1.2, rel_min_height = 0.01) +
  scale_fill_viridis_c(name = "reflectance", option = "D") +
  scale_x_continuous(expand = c(0, 0), limits = c(0, 15000)) +
  theme_minimal() +
  facet_wrap(~wavelength)
## Picking joint bandwidth of 21.2
## Picking joint bandwidth of 23.4
## Picking joint bandwidth of 116
## Picking joint bandwidth of 34.6
## Picking joint bandwidth of 55.5
## Picking joint bandwidth of 43.4

ggplot(fs_LND_long, aes(x = reflectance, y = wavelength, fill = stat(x))) +
  geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01) +
  scale_fill_viridis_c(name = "reflectance", option = "D") +
  scale_x_continuous(expand = c(0, 0), limits = c(0, 15000)) +
  theme_minimal() +
  facet_wrap(~sensor)
## Picking joint bandwidth of 65.6
## Picking joint bandwidth of 33.9
## Picking joint bandwidth of 33.8
## Picking joint bandwidth of 39.3
## Picking joint bandwidth of 72.2

feature space vis

import reference data

reference_spectra <- read.csv("data/feature_space/sli_gen_dark_soils_0p4.csv",
                              encoding = "UTF-8") %>% 
  mutate(SWIR_ratio = SWIR2/SWIR1,
         NDVI   = ((NIR-RED)/(NIR+RED)))

ggplot(reference_spectra, aes(NDVI, SWIR_ratio, color = cover)) +
  geom_point() +
  scale_colour_viridis_d(option = "D") +
  theme_minimal() 

plot complete data

# No dissagregation
ggplot(fs_LND, aes(NDVI, SWIR_ratio)) +
  stat_bin_hex(bins = 300) +
  geom_point(data=reference_spectra, aes(NDVI, SWIR_ratio), color ="red") +
  scale_fill_continuous(type = "viridis") +
  theme_minimal() +
  scale_x_continuous(expand = c(0, 0), limits = c(-0.2, 1)) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1.2))
## Warning: Removed 55 rows containing non-finite values (stat_binhex).

Here we can see that there are two dense zones in the spectral feature spaces. The smaller and fainter zone is predominately populated by landsat 7 pixels (clearly seen in the following plot)

facet by sensor

ggplot(fs_LND, aes(NDVI, SWIR_ratio)) +
  stat_bin_hex(bins = 200) +
  geom_point(data=reference_spectra, aes(NDVI, SWIR_ratio), color ="red") +
  scale_fill_continuous(type = "viridis") +
  theme_minimal() +
  scale_x_continuous(expand = c(0, 0), limits = c(-0.2, 1)) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1.2)) +
  facet_wrap(~sensor)
## Warning: Removed 55 rows containing non-finite values (stat_binhex).

Here we can see that the spectral library fits particularly well to Landsat 8 (probably do to the fact that is was created using lansat 8 pixels?).

facet by month

ggplot(fs_LND, aes(NDVI, SWIR_ratio)) +
  stat_bin_hex(bins = 100) +
  geom_point(data=reference_spectra, aes(NDVI, SWIR_ratio), color ="red") +
  scale_fill_continuous(type = "viridis") +
  theme_minimal() +
  scale_x_continuous(expand = c(0, 0), limits = c(-0.2, 1)) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1.2)) +
  facet_wrap(~month)
## Warning: Removed 55 rows containing non-finite values (stat_binhex).

Here we can also see that across all sensors, the spectral library seems to triangulate the data points better during growing season months.

facet by year

Displayed with only point per reference class to improve interpretability

1985-2000

# FROM 1985-2000
fs_LND |> filter(year<2001) %>%
ggplot(., aes(NDVI, SWIR_ratio)) +
  stat_bin_hex(bins = 100) +
  geom_point(data=reference_spectra[c(1,3,25),], aes(NDVI, SWIR_ratio), color ="red") +
  scale_fill_continuous(type = "viridis") +
  theme_minimal() +
  scale_x_continuous(expand = c(0, 0), limits = c(-0.2, 1)) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1.2)) +
  facet_wrap(~year)

2001-2022

# FROM 2001-2022
fs_LND |> filter(year>2000) %>%
ggplot(., aes(NDVI, SWIR_ratio)) +
  stat_bin_hex(bins = 100) +
  geom_point(data=reference_spectra[c(1,3,25),], aes(NDVI, SWIR_ratio), color ="red") +
  scale_fill_continuous(type = "viridis") +
  theme_minimal() +
  scale_x_continuous(expand = c(0, 0), limits = c(-0.2, 1)) +
  scale_y_continuous(expand = c(0, 0), limits = c(0, 1.2)) +
  facet_wrap(~year)

NDTI feature space

ggplot(fs_LND, aes(NDTI, SWIR_ratio)) +
  geom_point() +
 # stat_bin_hex(bins = 10) +
  #geom_point(data=reference_spectra, aes(NDVI, SWIR_ratio), color ="red") +
  scale_fill_continuous(type = "viridis") +
  theme_minimal()# +

#  scale_x_continuous(expand = c(0, 0), limits = c(-0.2, 1)) +
#  scale_y_continuous(expand = c(0, 0), limits = c(0, 1.2)) 

NDTI is the inverse of the SWIR ratio. Therefor the conventional feature space comparison inst really appropriate.

ggplot(fs_LND, aes(NDTI, RED/GREEN)) +
  stat_bin_hex(bins = 300) +
 # stat_bin_hex(bins = 10) +
  #geom_point(data=reference_spectra, aes(NDVI, SWIR_ratio), color ="red") +
  scale_fill_continuous(type = "viridis") +
  theme_minimal()# +

#  scale_x_continuous(expand = c(0, 0), limits = c(-0.2, 1)) +
#  scale_y_continuous(expand = c(0, 0), limits = c(0, 1.2)) 

Pick some random ratio of other bands (red green in this case), yields a visually similar plot, with limited interpretability. Maybe there is a specific ratio/comparison that is helpful though.

# old computationally exorbinant method of and plotting density plots 

# # Get density of points in 2 dimensions.
# # @param x A numeric vector.
# # @param y A numeric vector.
# # @param n Create a square n by n grid to compute density.
# # @return The density within each square.
# get_density <- function(x, y, ...) {
#   dens <- MASS::kde2d(x, y, ...)
#   ix <- findInterval(x, dens$x)
#   iy <- findInterval(y, dens$y)
#   ii <- cbind(ix, iy)
#   return(dens$z[ii])
# }
# 
# fs_density <- as.data.frame(fs_LND) %>%
#   filter(SWIR_ratio != "Inf") %>% 
#   mutate(density = get_density((.)$NDVI, (.)$SWIR_ratio, n = 500)) 

# ggplot(fs_density, aes(NDVI, SWIR_ratio, color = density)) +
#   geom_point() +
#   geom_point(data=reference_spectra, aes(NDVI, SWIR_ratio), color ="red") +
#   scale_colour_viridis_c(option = "D") +
#   theme_minimal() +
#   scale_x_continuous(expand = c(0, 0), limits = c(-0.2, 1)) +
#   scale_y_continuous(expand = c(0, 0), limits = c(0, 1.2)) +
#   facet_wrap(~sensor)

single location band compairison

Overview of observations over time by sensor

Temporal overvire and year resolution

fs_LND |>  
  group_by( year, sensor) %>% 
  summarise(n_observation = n()) %>%  
  
ggplot(., aes(year, n_observation, color=sensor)) +
  geom_line() +
  scale_fill_continuous(type = "viridis") +
  scale_colour_viridis_d(option = "D") +
  theme_minimal() 
## `summarise()` has grouped output by 'year'. You can override using the
## `.groups` argument.

Temporal overview at month resolution

fs_LND_obs_overview <- fs_LND %>% 
  group_by(month, year, sensor) %>% 
  summarise(n_observation = n()) %>%
  mutate(timestamp = zoo::as.yearmon(paste0(month,"_", year), format="%m_%Y")) 
## `summarise()` has grouped output by 'month', 'year'. You can override using the
## `.groups` argument.
ggplot(fs_LND_obs_overview, aes(timestamp, n_observation, color=sensor)) +
  geom_line(size=1) +
  scale_fill_continuous(type = "viridis") +
  scale_colour_viridis_d(option = "D") +
  scale_x_continuous(breaks=seq(1984,2022,2)) +
  theme_minimal()

The acute fluctuations in observation numbers seems to stem from the fact that during winter months there is a lower observation density (plotted below)

observation density by month

(same data with some different vis options)

ggplot(fs_LND_obs_overview, aes(x=month, y=n_observation, group=month,fill=sensor, color=sensor)) +
 # geom_boxplot(alpha=0.3, notch = T) +
  geom_violin(alpha=0.3) +
  geom_jitter(alpha=0.3) +
 #geom_density_ridges_gradient(scale = 1.2, rel_min_height = 0.01) +
  scale_colour_viridis_d(option = "D") +
  scale_fill_viridis_d(option = "D") +
  theme_minimal() +
  scale_x_continuous(breaks=seq(1,12,1)) +
  scale_y_continuous(limits = c(0, 2000)) +
  facet_wrap(~sensor)

ggplot(fs_LND_obs_overview, aes(x=month, y=n_observation, group=month,fill=sensor, color=sensor)) +
 geom_boxplot(alpha=0.3) +
 # geom_violin(alpha=0.3) +
  geom_jitter(alpha=0.3) +
 #geom_density_ridges_gradient(scale = 1.2, rel_min_height = 0.01) +
  scale_colour_viridis_d(option = "D") +
  scale_fill_viridis_d(option = "D") +
  theme_minimal() +
  scale_x_continuous(breaks=seq(1,12,1)) +
  scale_y_continuous(limits = c(0, 2000)) +
  facet_wrap(~sensor)

The density of observations in summer months are far greater (although more variable), While during winter months are seem to be constantly few observations. This trend is stable across sensors, so mostly likely can largely attributed to increased cloud/snow cover during winter months.

Exact matches of location and loose match time of observation

fs_LND_loose <- fs_LND_long %>% 
  # filter(sensor %in% c("LND07", "LND08")) %>% 
  mutate(obs_time_place = paste0(plyr::round_any(doy, 2, f = floor),"_", year,"_",POINT_ID)) %>% 
  group_by(obs_time_place) %>% 
  filter(n() > 6)

Total number of observations that match the loose condition of being within ten days of each other: 6.4243^{4}

Days of year with available observations: 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 30, 31, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 330, 331, 332, 333, 334, 335, 338, 339, 340, 341, 342, 343, 346, 347, 348, 349, 350, 351, 354, 355, 356, 357, 362, 363

Sample compairison of LND07 and LND08 spectra

for (i in 1:4) {
  
  scens <- unique(fs_LND_loose$obs_time_place)[c((((i-1)*20)+1):((i)*20))]
  
  fs_LND_loose_subset <- fs_LND_loose %>% 
    filter(obs_time_place %in% scens) |> 
    filter(sensor %in% c("LND07", "LND08")) 
  
  print(ggplot( fs_LND_loose_subset, aes(wavelength_num, reflectance, color=sensor, fill=sensor)) +
    # geom_density(alpha = 0.05) +
    geom_line() +
   # scale_colour_viridis_d(option = "D") +
    #scale_fill_viridis_d(option = "D") +
    scale_x_continuous(expand = c(0, 0)) +
    #scale_y_continuous(expand = c(0, 0), limits = c(0, 0.02)) +
    theme_minimal() +
    guides(col = guide_legend(nrow = 3))+
     theme(legend.position = "bottom", 
           axis.text.y = element_text(angle = 45),
           strip.text.y = element_text(size = 8, angle = 330))  +
    facet_wrap( ~obs_time_place) )
  }

Summary statistics for sensor variance between all sensors

fs_LND_sensor_summary <- fs_LND_long %>% 
  mutate(obs_time_place = paste0( round(doy, digits = -1),"_", year,"_",POINT_ID)) %>% 
  group_by(obs_time_place) %>% 
  filter(n() > 6) %>% 
  group_by(obs_time_place, sensor) %>% 
  summarise(SD = sd(reflectance))
## `summarise()` has grouped output by 'obs_time_place'. You can override using
## the `.groups` argument.
ggplot(fs_LND_sensor_summary, aes(x = SD, y = sensor, fill = stat(x))) +
  geom_density_ridges_gradient(scale = 1, rel_min_height = 0.01) +
  scale_fill_viridis_c(name = "SD", option = "D") +
 # scale_x_continuous(expand = c(0, 0), limits = c(0, 15000)) +
  theme_minimal()
## Picking joint bandwidth of 48.2

scatterplots

fs_LND_scatter <- fs_LND_loose |> 
  select(obs_time_place,sensor,wavelength,reflectance) |> 
  pivot_wider(names_from = sensor, values_from = c(reflectance)) |> 
  rename(reflectance_LND04 = LND04,
         reflectance_LND05 = LND05,
         reflectance_LND07 = LND07,
         reflectance_LND08  = LND08,
         reflectance_LND09= LND09)

Scatterplot of Landsat 7 and 8 specta

fs_LND_scatter_LND_7_8 <- fs_LND_scatter |> select(-reflectance_LND04,-reflectance_LND05, -reflectance_LND09) |> 
  na.omit()

ggplot(fs_LND_scatter_LND_7_8,aes(reflectance_LND07, reflectance_LND08)) +
stat_bin_hex(bins = 100) +
  geom_abline(intercept=1,slope=1, linetype=2) +
  scale_colour_viridis_d(option = "D") +
  scale_fill_continuous(type = "viridis") +
  theme_minimal() +
  annotate("text", x=75, y=105,
           label= paste0("R2 =",round((cor((fs_LND_scatter_LND_7_8$reflectance_LND07), 
                                           (fs_LND_scatter_LND_7_8$reflectance_LND08),
                                           use="complete.obs")^2),2)), size=4.5, hjust=0) + 
  
  annotate("text", x= 75, y=100, hjust=0, size=4.5,
           label= paste0("RMSE ==",round(sqrt(mean(((fs_LND_scatter_LND_7_8$reflectance_LND07)-
                                                    (fs_LND_scatter_LND_7_8$reflectance_LND08))^2,
                                                   na.rm=TRUE)),2)), parse=TRUE)+
  
  annotate("text", x=75, y= 95, size=4.5, hjust=0,
           label=paste0("Bias = ", round((mean((fs_LND_scatter_LND_7_8$reflectance_LND07), na.rm=TRUE) -
                                          mean((fs_LND_scatter_LND_7_8$reflectance_LND08),na.rm=TRUE)),2)))+
  
  annotate("text", x=75, y=90,size=4.5,hjust=0, 
           label=paste0("MAE = ", round(mean(abs((fs_LND_scatter_LND_7_8$reflectance_LND07)-                                          
                                                (fs_LND_scatter_LND_7_8$reflectance_LND08))),2)))

compairson by wavelength

ggplot(fs_LND_scatter, aes(reflectance_LND07, reflectance_LND08)) +
  stat_bin_hex(bins = 100) +
  geom_abline(intercept=1,slope=1, linetype=2) +
  scale_colour_viridis_d(option = "D") +
  scale_fill_continuous(type = "viridis") +
  theme_minimal() +
  facet_wrap(~wavelength, 
             #scales="free"
  ) 
## Warning: Removed 99396 rows containing non-finite values (stat_binhex).

Scatterplot of Landsat 5 and 7 specta

fs_LND_scatter_LND_5_7 <- fs_LND_scatter |> select(-reflectance_LND04,-reflectance_LND08, -reflectance_LND09) |> 
  na.omit()


ggplot(fs_LND_scatter_LND_5_7, aes(reflectance_LND07, reflectance_LND05)) +
stat_bin_hex(bins = 100) +
  geom_abline(intercept=1,slope=1, linetype=2) +
  scale_colour_viridis_d(option = "D") +
  scale_fill_continuous(type = "viridis") +
  theme_minimal() + 
  annotate("text", x=60, y=105,
           label= paste0("R2 =",round((cor((fs_LND_scatter_LND_5_7$reflectance_LND07), 
                                           (fs_LND_scatter_LND_5_7$reflectance_LND05),
                                           use="complete.obs")^2),2)), size=4.5, hjust=0) + 
  
  annotate("text", x= 60, y=100, hjust=0, size=4.5,
           label= paste0("RMSE ==",round(sqrt(mean(((fs_LND_scatter_LND_5_7$reflectance_LND07)-
                                                    (fs_LND_scatter_LND_5_7$reflectance_LND05))^2,
                                                   na.rm=TRUE)),2)), parse=TRUE)+
  
  annotate("text", x=60, y= 95, size=4.5, hjust=0,
           label=paste0("Bias = ", round((mean((fs_LND_scatter_LND_5_7$reflectance_LND07), na.rm=TRUE) -
                                          mean((fs_LND_scatter_LND_5_7$reflectance_LND05),na.rm=TRUE)),2)))+
  
  annotate("text", x=60, y=90,size=4.5,hjust=0, 
           label=paste0("MAE = ", round(mean(abs((fs_LND_scatter_LND_5_7$reflectance_LND07)-
                                                (fs_LND_scatter_LND_5_7$reflectance_LND05))),2))) 

compairson by wavelength

ggplot(fs_LND_scatter, aes(reflectance_LND07, reflectance_LND05)) +
  stat_bin_hex(bins = 100) +
  geom_abline(intercept=1,slope=1, linetype=2) +
  scale_colour_viridis_d(option = "D") +
  scale_fill_continuous(type = "viridis") +
  theme_minimal() +
  facet_wrap(~wavelength, 
             #scales="free"
  ) 
## Warning: Removed 114504 rows containing non-finite values (stat_binhex).

Not fully developed, But here are some additional plots with exact location matches/

compairison of LND 7 and 8

doy agnostic plots for single locations

fs_LND_7_8 <- fs_LND_long %>% 
  filter(sensor %in% c("LND07", "LND08"),
         year == 2020, 
         doy %in% c(93:120),
         POINT_ID %in% c(40763060:40643065)) 


fs_LND_7_8 <- fs_LND_long %>% 
  filter(sensor %in% c("LND07", "LND08"),
         year == 2020, 
         doy %in% c(96:319),
         POINT_ID %in% c(40763060:40764068)) 


# facet wrap by wavelength
ggplot(fs_LND_7_8, aes(reflectance, color=wavelength, fill=wavelength)) +
  geom_density(alpha = 0.05) +
  #geom_jitter() +
  scale_colour_viridis_d(option = "D") +
  scale_fill_viridis_d(option = "D") +
  scale_x_continuous(expand = c(0, 0)) +
  #scale_y_continuous(expand = c(0, 0), limits = c(0, 0.02)) +
  theme_minimal() +
  guides(col = guide_legend(nrow = 3))+
  theme(legend.position = "bottom")  +
   facet_grid(rows = vars(POINT_ID), cols = vars(sensor))

Exact matches of location and time of observation

fs_LND_exact <- fs_LND_long %>% 
  mutate(obs_time_place = paste0(doy,"_", year,"_",POINT_ID)) %>% 
  group_by(obs_time_place) %>% 
  filter(n() > 6)

unique(fs_LND_exact$sensor)
## [1] "LND07" "LND09"
# facet wrap by wavelength
ggplot(fs_LND_exact[c(8000:15000),], aes(reflectance, color=wavelength, fill=wavelength)) +
  geom_density(alpha = 0.05) +
  #geom_jitter() +
  scale_colour_viridis_d(option = "D") +
  scale_fill_viridis_d(option = "D") +
  scale_x_continuous(expand = c(0, 0)) +
  #scale_y_continuous(expand = c(0, 0), limits = c(0, 0.02)) +
  theme_minimal() +
  guides(col = guide_legend(nrow = 3))+
  theme(legend.position = "bottom")  +
   facet_grid(rows = vars(doy), cols = vars(sensor)) 

Only observation for landsat 7 and 9 exactly match. Visually the correspondence between the spectra is pretty decent though.